******************************************************************************************
* copihsn.do
* Analysis IHSN database to produce Figures 1 and 2, and Table 1 from Demombynes & Sandefur (2014).
* Justin Sandefur
******************************************************************************************

drop _all
clear
set more off
if "`c(username)'" == "Justin" {
	global root 	= "/Mac/Dropbox/Copenhagen"
}
else {
	global root		= "..."						// To run, enter local file-path of Dropbox folder here
}
global rawdata 		= "$root/Data/Raw"
global processed 	= "$root/Data/Processed"
global dofiles 		= "$root/Data/Dofiles"
global figures 		= "$root/Paper/Latex/Figures"
cd "$dofiles"

*** Merge full IHSN catalog to only those with micro data. 
	insheet using "$rawdata/ihsn_open.csv", comma names clear
	gen open = 1
	tempfile open
	save `open'
	insheet using "$rawdata/ihsn_all.csv", comma names clear
	mmerge surveyid using `open'
	replace open = 0 if open==.
	
*** Country-year level data set
	rename data_coll_start year
	collapse (mean) open (count) surveys=open, by(nation year)
	count

*** Add WB country codes, regions, etc.
	replace nation = "Guinea.Bissau" 							if nation == "Guiné-Bissau"
	replace nation = "Serbia, Republic of" 						if nation == "Serbia and Montenegro"
	replace nation = "Sao Tome and Principe" 					if nation == "São Tomé and Principe"
	replace nation = "Cape Verde" 								if nation == "Cabo Verde"
	replace nation = "Congo, Dem. Rep." 						if nation == "République Démocratique du Congo"
	replace nation = "Cote d'Ivoire" 							if nation == "Côte d'Ivoire"
	replace nation = "Democratic People's Republic of Korea" 	if nation == "DPR Korea"
	mmerge nation using "$rawdata/codes_masterlist.dta", umatch(country) unmatched(master)
	tab nation if _merge==1							// Mostly lacking codes for global and regional surveys


*** Create data set with zeroes -- ie all country/year observations present
	preserve
		duplicates drop nation, force
		keep nation wb_region wb_ccode
		expand 35
		bysort nation: gen year = _n + 1979
		tempfile countryyear
		save `countryyear', replace
	restore
	mmerge nation year using `countryyear'
	replace open = 0 if open==.
	replace surveys = 0 if surveys==.
	
	
*** Merge in WDI indicators + Polity IV, WGI
	preserve
		insheet using "$rawdata/polity4.csv", clear names comma
		replace country = "Congo (Brazzaville)" if country == "Congo Brazzaville"
		replace country = "Congo Democratic Republic" if country == "Congo Kinshasa"
		replace country = "Timor-Leste" if country == "East Timor"
		replace country = "Myanmar" if country == "Myanmar (Burma)"
		replace country = "Trinidad and Tobago" if country == "Trinidad"
		replace country = "United Arab Emirates" if country == "UAE"
		replace country = "Bosnia & Herzegovina" if country == "Bosnia"
		mmerge country using "$rawdata/codes_masterlist.dta", unmatched(master) ukeep(wb_ccode)
		keep if year>=1980
		sort wb_ccode year
		tempfile polity
		save `polity', replace
		insheet using "$rawdata/wdi_net_oda.csv", clear names comma
		duplicates drop countrycode, force
		reshape long yr, i(countrycode) j(year)
		rename yr net_oda
		rename countrycode wb_ccode
		drop if wb_ccode==""
		destring net_oda, replace force
		mmerge wb_ccode year using "$rawdata/wdi_gni", umatch(ccode year) 
		mmerge wb_ccode year using "$rawdata/wdi_pop"
		mmerge wb_ccode year using "$rawdata/wgidataset.dta", umatch(CTRY YEAR)
		mmerge wb_ccode year using `polity'
		tempfile wdi
		save `wdi', replace
	restore
	mmerge wb_ccode year using `wdi'

************************
*** Regression Table ***
************************

	gen lgdp = ln(gdppc)
	gen lpop = ln(pop)
	gen laid = ln(net_oda)
	capture drop ccode
	encode wb_ccode, g(ccode)
	replace net_oda=. if net_oda>100 | net_oda<0
	replace democ = . if abs(democ)>20
	rename GE_EST goveff
	foreach var in goveff democ{
		sum `var'
		replace `var' = (`var' - r(mean))/r(sd)
	}
	tab year, g(dyear)
	label variable lpop 	"Log population"
	label variable lgdp 	"Log per capita GDP, PPP"
	label variable laid 	"Log aid share of gov. budget"
	label variable democ 	"Polity IV democracy index"
	label variable goveff 	"WGI gov. effectiveness index"
	

	eststo s1: 	reg surveys lpop lgdp 					dyear* if year>=1990, cluster(ccode)
				reg surveys lpop lgdp laid democ goveff dyear* if year>=1990, cluster(ccode)
	eststo s2: 	reg surveys lpop lgdp 					dyear* if e(sample)	, cluster(ccode)
	eststo s3: 	reg surveys lpop lgdp laid democ goveff dyear* if e(sample)	, cluster(ccode)
	eststo o1: 	reg open	lpop lgdp 					dyear* if year>=1990, cluster(ccode)
				reg open	lpop lgdp laid democ goveff dyear* if year>=1990, cluster(ccode)
	eststo o2: 	reg open	lpop lgdp 					dyear* if e(sample)	, cluster(ccode)
	eststo o3: 	reg open	lpop lgdp laid democ goveff dyear* if e(sample)	, cluster(ccode)

	esttab s1 s2 s3 o1 o2 o3 using "$figures/reg_ihsn.tex", 															///
		f booktabs eqlabels(none) label nodep noobs nonotes nolines 	order(lgdp lpop laid democ goveff)				///
		starlevels(* 0.10 ** 0.05 *** 0.01) se gaps replace nomtitles 	keep(lgdp lpop laid democ goveff)				///
		stats(N N_clust r2, fmt(0 0 2) labels("Observations" "Countries" "R-squared"))									///
		mgroups("Surveys per year" "Share of surveys open", pattern(1 0 0 1 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 



*******************
*** Trend graph ***
*******************

*** Survey frequency over time by region
	do "/Mac/Macros/JCS Graph Colors.do"	// Purely cosmetic
	mycolors white
	preserve
		keep if year>=1980 & year<=2012
		collapse (mean) survey open, by(wb_region year)
		tokenize `""Surveys per country per year""Share of surveys in public domain""'
		local n = 1
		foreach var in survey open{
			tempvar temp
			gen `temp' = . 
			levelsof wb_region, local(regions)
			local i = 1
			foreach region of local regions{
				tempvar temp`i'
				lowess `var' year if wb_region=="`region'", nograph gen(`temp`i'')
				replace `temp' = `temp`i'' if wb_region=="`region'"
				local ++i
			}
			twoway 	(lowess `var'  year if wb_region=="East Asia & Pacific"			, lwidth(thick) lcolor(gs12))	///
	 				(lowess `var'  year if wb_region=="Europe & Central Asia"		, lwidth(thick) lcolor(gs12))	///
	 				(lowess `var'  year if wb_region=="Latin America & Caribbean"	, lwidth(thick) lcolor(gs12))	///
	 				(lowess `var'  year if wb_region=="Middle East & North Africa"	, lwidth(thick) lcolor(gs12))	///
	 				(lowess `var'  year if wb_region=="South Asia"					, lwidth(thick) lcolor(gs12))	///
					(lowess `var'  year if wb_region=="Sub-Saharan Africa"			, lwidth(thick) lcolor($c1))	///
	 				(scatter `temp' year if year==2012, mlabel(wb_region) mlabc(black) msymbol(none) mlabpos(3)),	///
	 				legend(order(1 "AFR" 2 "EAP" 3 "ECA" 4 "LAC" 5 "MNA" 6 "SAS"))									///
	 				legend(off) $myregion aspect(1) ylabel(, $myylabel) 											///
	 				xtitle("") ytitle("``n''")
	 		gr_edit .style.editstyle declared_xsize(7) editcopy
	 		graph export "$figures/lowess_`var'.pdf", as(pdf) replace
	 		local ++n
	 	}
	restore
	
exit



